Do social distancing measures - now in effect in most of the US - work? This notebook uses a mobility index based on aggregated phone location data as a proxy for how well social distancing is practiced. It plots the mean daily growth in COVID-19 cases in a county against the mean mobility index in that county at an earlier time.
It's interactive, scroll to the bottom to play around with the parameters that figure into the plot.
import numpy as np
import pandas as pd
Load data from the continuously updated sources (New York Times and Descartes Labs). Run this cell to get the latest datasets.
nyt_us_counties_url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
dl_us_mobility_url = "https://raw.githubusercontent.com/descarteslabs/DL-COVID-19/master/DL-us-mobility-daterow.csv"
us_counties = pd.read_csv(
nyt_us_counties_url,
dtype={"fips": str},
parse_dates=["date"]
).set_index("date")
mobility = pd.read_csv(
dl_us_mobility_url,
dtype={"fips": str},
parse_dates=["date"]
).set_index("date")
Implement the meat of the logic, aggregating and joining the two datasets and plotting the result.
from datetime import timedelta
from matplotlib.ticker import AutoMinorLocator, MultipleLocator
from textwrap import wrap
def aggregate(
us_state="New York",
min_cases_threshold=20,
pandemic_growth_timespan=("2020-03-30", "2020-04-05"),
mobility_timespan=("2020-03-16", "2020-03-22")
):
# We select one earlier day from the NYT data so we can calculate the
# growth for the first day in the given timespan.
interval_start = pd.Timestamp(pandemic_growth_timespan[0]) - timedelta(days=1)
us_counties_timespan = us_counties.loc[interval_start:pandemic_growth_timespan[1]]
mobility_march_timespan = mobility.loc[slice(*mobility_timespan)]
us_counties_for_state = us_counties_timespan[
(us_counties_timespan["state"] == us_state)
]
mobility_for_state = mobility_march_timespan[
(mobility_march_timespan["admin_level"] == 2)
& (mobility_march_timespan["admin1"] == us_state)
]
def mean_daily_growth(series):
return np.mean(series / series.shift(1) - 1) * 100
us_counties_mean_growth = us_counties_for_state[["fips", "cases", "county"]] \
.groupby("fips") \
.aggregate(
max_cases=pd.NamedAgg(column="cases", aggfunc=np.max),
mean_daily_growth=pd.NamedAgg(column="cases", aggfunc=mean_daily_growth),
county=pd.NamedAgg(column="county", aggfunc=lambda x: x[0])
)
mobility_mean = mobility_for_state.groupby("fips").mean()
return pd.merge(
us_counties_mean_growth[
us_counties_mean_growth["max_cases"] >= min_cases_threshold
][["county", "mean_daily_growth"]],
mobility_mean["m50_index"],
on="fips"
)
def aggregate_and_plot(
us_state="New York",
min_cases_threshold=20,
pandemic_growth_timespan=("2020-03-30", "2020-04-05"),
mobility_timespan=("2020-03-16", "2020-03-22")
):
aggregated_data = aggregate(
us_state=us_state,
min_cases_threshold=min_cases_threshold,
pandemic_growth_timespan=pandemic_growth_timespan,
mobility_timespan=mobility_timespan
)
slope, intercept = np.polyfit(
aggregated_data.m50_index,
aggregated_data.mean_daily_growth,
deg=1
)
x_range = aggregated_data.m50_index.max() - aggregated_data.m50_index.min()
fit_x = np.linspace(
aggregated_data.m50_index.min() - x_range * 0.05,
aggregated_data.m50_index.max() + x_range * 0.05,
100
)
fit_y = slope * fit_x + intercept
ax = aggregated_data.plot.scatter(
x="m50_index",
y="mean_daily_growth",
s=40,
c="#090",
figsize=(9, 9),
zorder=1000
)
for index, row in aggregated_data.iterrows():
ax.text(
x=row["m50_index"] + 0.2,
y=row["mean_daily_growth"] + 0.2,
s=row["county"],
fontsize=10,
c="#090",
zorder=1000
)
ax.plot(fit_x, fit_y, c="#000")
caption = (
f"The association between the mean DL mobility index "
f"(from {mobility_timespan[0]} to {mobility_timespan[1]}) "
f"and the mean daily growth rate "
f"(from {pandemic_growth_timespan[0]} to {pandemic_growth_timespan[1]}) "
f"in the 4th week of March 2020 in {us_state} state counties "
f"with at least {min_cases_threshold} COVID-19 cases by {pandemic_growth_timespan[1]}"
)
ax.set_title(
"\n".join(wrap(caption, 80)),
y=-0.27, x=-0.08, loc="left", fontweight="bold", wrap=True
)
ax.set_ylabel("Mean daily growth rate (%)")
ax.set_xlabel("Mean DL mobility index (%)")
ax.xaxis.set_major_locator(MultipleLocator(10))
ax.yaxis.set_major_locator(MultipleLocator(10))
ax.xaxis.set_minor_locator(AutoMinorLocator(2))
ax.yaxis.set_minor_locator(AutoMinorLocator(2))
ax.grid(which='major', color='#eee', linewidth=2)
ax.grid(which='minor', color='#eee')
Define widgets that will be used for the interactive plot.
from ipywidgets import interact, Layout
import ipywidgets as widgets
us_states = sorted(set(us_counties["state"].unique()) & set(mobility["admin1"].unique()))
pandemic_growth_timespan_options = [
timestamp.strftime("%Y-%m-%d")
for timestamp in pd.date_range(us_counties.index.min(), us_counties.index.max())
]
mobility_index_timespan_options = [
timestamp.strftime("%Y-%m-%d")
for timestamp in pd.date_range(mobility.index.min(), mobility.index.max())
]
styling = {
'layout': Layout(width='600px'),
'style': {'description_width': 'initial'}
}
us_state_widget = widgets.Dropdown(
description="US state",
options=us_states,
value="New York",
**styling
)
min_cases_threshold_widget = widgets.IntText(
description="Min cases per county",
value=50,
**styling
)
pandemic_growth_timespan_widget = widgets.SelectionRangeSlider(
description="Pandemic growth timespan (y axis)",
options=pandemic_growth_timespan_options,
value=("2020-03-30", "2020-04-05"),
continuous_update=False,
**styling
)
mobility_timespan_widget = widgets.SelectionRangeSlider(
description="Mobility index timespan (x axis)",
options=mobility_index_timespan_options,
value=("2020-03-16", "2020-03-22"),
continuous_update=False,
**styling
)
Plot with input from interactive widgets. Use the widgets to update the plot in real time.
(The plot will only show and the interaction will only work if you are looking at this in a real Jupyter runtime. The Github view on a notebook doesn't work, for example.)
interact(
aggregate_and_plot,
us_state=us_state_widget,
min_cases_threshold=min_cases_threshold_widget,
pandemic_growth_timespan=pandemic_growth_timespan_widget,
mobility_timespan=mobility_timespan_widget
)
interactive(children=(Dropdown(description='US state', index=31, layout=Layout(width='600px'), options=('Alaba…
<function __main__.aggregate_and_plot(us_state='New York', min_cases_threshold=20, pandemic_growth_timespan=('2020-03-30', '2020-04-05'), mobility_timespan=('2020-03-16', '2020-03-22'))>